This R Markdown file includes my completion of ISLR chapter 9, exercises 7 and 8.
Analyzing Support Vector Approaches
library(tidyverse)
library(openintro)
library(dplyr)
library(ISLR2)
library(e1071)mpg_median <- median(Auto$mpg)
Auto$aboveMedian <- ifelse(Auto$mpg > mpg_median, 1, 0)
Auto$aboveMedian <- as.factor(Auto$aboveMedian)set.seed(392)
mpg.tune1 <- tune(svm, aboveMedian ~ ., data = Auto, kernel = "linear", ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)), scale = FALSE)
options(scipen = 999)
# Cross Validation Errors for each value of cost
summary(mpg.tune1)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.002564103
##
## - Detailed performance results:
## cost error dispersion
## 1 0.001 0.066346154 0.045395046
## 2 0.010 0.017948718 0.032093989
## 3 0.100 0.002564103 0.008108404
## 4 1.000 0.002564103 0.008108404
## 5 5.000 0.005128205 0.010811206
## 6 10.000 0.005128205 0.010811206
## 7 100.000 0.005128205 0.010811206
bestmod <- mpg.tune1$best.model
bestmod##
## Call:
## best.tune(METHOD = svm, train.x = aboveMedian ~ ., data = Auto, ranges = list(cost = c(0.001,
## 0.01, 0.1, 1, 5, 10, 100)), kernel = "linear", scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.1
##
## Number of Support Vectors: 26
# The model with the lowest cross validation error is the model using cost = 0.1# Radial
svm.rad <- tune(svm, aboveMedian ~ ., data = Auto, kernel = "radial", ranges = list(
cost = c(0.1, 1, 10, 100, 1000), gamma = c(0.5, 1, 2, 3, 4)))
summary(svm.rad)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.5
##
## - best performance: 0.04833333
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 0.5 0.08153846 0.03944238
## 2 1.0 0.5 0.05083333 0.03551389
## 3 10.0 0.5 0.04833333 0.03674245
## 4 100.0 0.5 0.04833333 0.03674245
## 5 1000.0 0.5 0.04833333 0.03674245
## 6 0.1 1.0 0.56115385 0.04172656
## 7 1.0 1.0 0.06365385 0.03640019
## 8 10.0 1.0 0.06621795 0.03206275
## 9 100.0 1.0 0.06621795 0.03206275
## 10 1000.0 1.0 0.06621795 0.03206275
## 11 0.1 2.0 0.56115385 0.04172656
## 12 1.0 2.0 0.12961538 0.09085685
## 13 10.0 2.0 0.12198718 0.08397303
## 14 100.0 2.0 0.12198718 0.08397303
## 15 1000.0 2.0 0.12198718 0.08397303
## 16 0.1 3.0 0.56115385 0.04172656
## 17 1.0 3.0 0.46192308 0.09498492
## 18 10.0 3.0 0.42865385 0.13023887
## 19 100.0 3.0 0.42865385 0.13023887
## 20 1000.0 3.0 0.42865385 0.13023887
## 21 0.1 4.0 0.56115385 0.04172656
## 22 1.0 4.0 0.49743590 0.05338375
## 23 10.0 4.0 0.49487179 0.05453450
## 24 100.0 4.0 0.49487179 0.05453450
## 25 1000.0 4.0 0.49487179 0.05453450
best.rad <- svm.rad$best.model
best.rad##
## Call:
## best.tune(METHOD = svm, train.x = aboveMedian ~ ., data = Auto, ranges = list(cost = c(0.1,
## 1, 10, 100, 1000), gamma = c(0.5, 1, 2, 3, 4)), kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 10
##
## Number of Support Vectors: 259
# The radial kernel model with the lowest cross validation error is when cost = 10 and gamma = 0.5
# Polynomial
svm.poly <- tune(svm, aboveMedian ~ ., data = Auto, kernel = "polynomial", ranges = list(
cost = c(0.1, 1, 10, 100, 1000), degree = c(0.5, 1, 2, 3, 4)))
summary(svm.poly)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 1000 1
##
## - best performance: 0.01538462
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 0.5 0.57391026 0.04052405
## 2 1.0 0.5 0.57391026 0.04052405
## 3 10.0 0.5 0.57391026 0.04052405
## 4 100.0 0.5 0.57391026 0.04052405
## 5 1000.0 0.5 0.57391026 0.04052405
## 6 0.1 1.0 0.17647436 0.08021337
## 7 1.0 1.0 0.08160256 0.03961477
## 8 10.0 1.0 0.06128205 0.03664788
## 9 100.0 1.0 0.02551282 0.02948687
## 10 1000.0 1.0 0.01538462 0.02477158
## 11 0.1 2.0 0.57391026 0.04052405
## 12 1.0 2.0 0.57391026 0.04052405
## 13 10.0 2.0 0.54570513 0.07823901
## 14 100.0 2.0 0.30583333 0.05153999
## 15 1000.0 2.0 0.25506410 0.04470916
## 16 0.1 3.0 0.57391026 0.04052405
## 17 1.0 3.0 0.57391026 0.04052405
## 18 10.0 3.0 0.57391026 0.04052405
## 19 100.0 3.0 0.34987179 0.08474145
## 20 1000.0 3.0 0.25762821 0.04184811
## 21 0.1 4.0 0.57391026 0.04052405
## 22 1.0 4.0 0.57391026 0.04052405
## 23 10.0 4.0 0.57391026 0.04052405
## 24 100.0 4.0 0.57391026 0.04052405
## 25 1000.0 4.0 0.56878205 0.03784893
best.poly <- svm.poly$best.model
best.poly##
## Call:
## best.tune(METHOD = svm, train.x = aboveMedian ~ ., data = Auto, ranges = list(cost = c(0.1,
## 1, 10, 100, 1000), degree = c(0.5, 1, 2, 3, 4)), kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1000
## degree: 1
## coef.0: 0
##
## Number of Support Vectors: 49
# The polynomial kernel model with the lowest cross validation error is when cost = 1000 and degree = 1l.fit <- svm(aboveMedian ~ ., data = Auto, kernel = "linear", cost = 0.1)
r.fit <- svm(aboveMedian ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.5)
p.fit <- svm(aboveMedian ~ ., data = Auto, kernel = "polynomial", cost = 1000, degree = 1)
plot(l.fit, Auto, mpg ~ horsepower)plot(r.fit, Auto, mpg ~ horsepower)plot(p.fit, Auto, mpg ~ horsepower)plot(l.fit, Auto, mpg ~ displacement)plot(r.fit, Auto, mpg ~ displacement)plot(p.fit, Auto, mpg ~ displacement)plot(l.fit, Auto, mpg ~ origin)plot(r.fit, Auto, mpg ~ origin)plot(p.fit, Auto, mpg ~ origin)plot(l.fit, Auto, mpg ~ year)plot(r.fit, Auto, mpg ~ year)plot(p.fit, Auto, mpg ~ year)plot(l.fit, Auto, mpg ~ cylinders)plot(r.fit, Auto, mpg ~ cylinders)plot(p.fit, Auto, mpg ~ cylinders)set.seed(392)
train = sample (1070,800)
training = OJ[train,]
testing = OJ[-train,]?OJ
oj.svm <- svm(Purchase ~., data = training, kernel = "linear", cost = 0.01, scale = FALSE)
summary(oj.svm)##
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "linear", cost = 0.01,
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 598
##
## ( 300 298 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
# When using linear kernel and cost = 0.01, there were 598 support vectors, 300 in one class and 298 in the other. ypred <- predict(oj.svm, training)
table(predict = ypred, truth = training$Purchase)## truth
## predict CH MM
## CH 477 178
## MM 25 120
train_error <- 1-(477+120)/800
train_error## [1] 0.25375
# Training error is 0.25375
ypred <- predict(oj.svm, testing)
table(predict = ypred, truth = testing$Purchase)## truth
## predict CH MM
## CH 142 81
## MM 9 38
test_error <- 1-(142+38)/270
test_error## [1] 0.3333333
# The test error is 0.3333333oj.tunel <- tune(svm, Purchase ~., data = training, kernel = "linear", ranges=list(cost=c(0.01, 0.1, 1, 5, 10)), scale = FALSE)
summary(oj.tunel)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.28000 0.06129392
## 2 0.10 0.17625 0.03356689
## 3 1.00 0.17125 0.04291869
## 4 5.00 0.17375 0.04308019
## 5 10.00 0.18000 0.06072479
best.l <- oj.tunel$best.model
best.l##
## Call:
## best.tune(METHOD = svm, train.x = Purchase ~ ., data = training,
## ranges = list(cost = c(0.01, 0.1, 1, 5, 10)), kernel = "linear",
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 341
oj.svm1 <- svm(Purchase ~., data = training, kernel = "linear", cost = 1, scale = FALSE)oj.svm1 <- svm(Purchase ~., data = training, kernel = "linear", cost = 1, scale = FALSE)
set.seed(392)
ypred <- predict(oj.svm1, training)
table(predict = ypred, truth = training$Purchase)## truth
## predict CH MM
## CH 450 74
## MM 52 224
train_error <- 1-(450+224)/800
train_error## [1] 0.1575
# OJ Training error for cost = 1 linear kernel is 0.1575
ypred <- predict(oj.svm1, testing)
table(predict = ypred, truth = testing$Purchase)## truth
## predict CH MM
## CH 133 35
## MM 18 84
test_error <- 1-(133+84)/270
test_error## [1] 0.1962963
# OJ test error for cost = 1 linear kernel is 0.1962963# b
oj.Radial <- svm(Purchase ~., data = training, kernel = "radial", cost = 0.01, scale = FALSE)
summary(oj.Radial)##
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "radial", cost = 0.01,
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 612
##
## ( 314 298 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
# When using radial kernel and cost = 0.01 and default gamma, there were 612 support vectors, 314 in one class and 298 in the other.
# c
ypred <- predict(oj.Radial, training)
table(predict = ypred, truth = training$Purchase)## truth
## predict CH MM
## CH 502 298
## MM 0 0
train_error <- 1-(502+0)/800
train_error## [1] 0.3725
# Training error is 0.3725
ypred <- predict(oj.Radial, testing)
table(predict = ypred, truth = testing$Purchase)## truth
## predict CH MM
## CH 151 119
## MM 0 0
test_error <- 1-(151+0)/270
test_error## [1] 0.4407407
# The test error is 0.4407407
# d
oj.tune2 <- tune(svm, Purchase ~ ., data = training, kernel = "radial", ranges = list(
cost=c(0.01, 0.1, 1, 5, 10)))
summary(oj.tune2)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17375
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.37250 0.04958158
## 2 0.10 0.18125 0.02960973
## 3 1.00 0.17375 0.03606033
## 4 5.00 0.18125 0.04299952
## 5 10.00 0.18875 0.04581439
best.r <- oj.tune2$best.model
best.r##
## Call:
## best.tune(METHOD = svm, train.x = Purchase ~ ., data = training,
## ranges = list(cost = c(0.01, 0.1, 1, 5, 10)), kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 364
# e
oj.svm2 <- svm(Purchase ~., data = training, kernel = "radial", cost = 1, scale = FALSE)
set.seed(392)
ypred <- predict(oj.svm2, training)
table(predict = ypred, truth = training$Purchase)## truth
## predict CH MM
## CH 443 123
## MM 59 175
train_error <- 1-(433+175)/800
train_error## [1] 0.24
# Training error is 0.24
ypred <- predict(oj.svm2, testing)
table(predict = ypred, truth = testing$Purchase)## truth
## predict CH MM
## CH 123 57
## MM 28 62
test_error <- 1-(123+62)/270
test_error## [1] 0.3148148
# Test error is 0.3148148# b
oj.poly <- svm(Purchase ~ ., data = OJ, kernel = "polynomial", cost = 0.01, degree = 2)
summary(oj.poly)##
## Call:
## svm(formula = Purchase ~ ., data = OJ, kernel = "polynomial", cost = 0.01,
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 839
##
## ( 422 417 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
# For a polynomial kernel with cost = 0.01 and degree = 2, there are 839 vectors with 422 beloning to one class and 417 belonging to the other
# c
ypred <- predict(oj.poly, training)
set.seed(392)
table(predict = ypred, truth = training$Purchase)## truth
## predict CH MM
## CH 497 280
## MM 5 18
train_error <- 1-(497 + 18)/800
train_error## [1] 0.35625
# Training error is 0.35625
ypred <- predict(oj.poly, testing)
table(predict = ypred, truth = testing$Purchase)## truth
## predict CH MM
## CH 149 108
## MM 2 11
test_error <- 1-(149+11)/270
test_error## [1] 0.4074074
# The test error is 0.4074074
# d
svm.poly <- tune(svm, Purchase ~ ., data = OJ, kernel = "polynomial", ranges = list(
cost = c(0.001, 0.1, 1, 10), degree = c(0.5, 1, 2, 3, 4)))
summary(svm.poly)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 1
##
## - best performance: 0.1691589
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.001 0.5 0.3897196 0.05778795
## 2 0.100 0.5 0.3897196 0.05778795
## 3 1.000 0.5 0.3897196 0.05778795
## 4 10.000 0.5 0.3897196 0.05778795
## 5 0.001 1.0 0.3897196 0.05778795
## 6 0.100 1.0 0.1738318 0.02576458
## 7 1.000 1.0 0.1747664 0.03470411
## 8 10.000 1.0 0.1691589 0.03564222
## 9 0.001 2.0 0.3897196 0.05778795
## 10 0.100 2.0 0.3046729 0.05679701
## 11 1.000 2.0 0.1943925 0.03290994
## 12 10.000 2.0 0.1785047 0.03902172
## 13 0.001 3.0 0.3897196 0.05778795
## 14 0.100 3.0 0.2700935 0.05475250
## 15 1.000 3.0 0.1887850 0.03600793
## 16 10.000 3.0 0.1841121 0.03816676
## 17 0.001 4.0 0.3897196 0.05778795
## 18 0.100 4.0 0.3112150 0.05250862
## 19 1.000 4.0 0.2149533 0.03176959
## 20 10.000 4.0 0.1971963 0.03220949
best.poly <- svm.poly$best.model
best.poly##
## Call:
## best.tune(METHOD = svm, train.x = Purchase ~ ., data = OJ, ranges = list(cost = c(0.001,
## 0.1, 1, 10), degree = c(0.5, 1, 2, 3, 4)), kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 10
## degree: 1
## coef.0: 0
##
## Number of Support Vectors: 443
# e
oj.svm3 <- svm(Purchase ~ ., data = OJ, kernel = "polynomial", cost = 10, degree = 1)
summary(oj.svm3)##
## Call:
## svm(formula = Purchase ~ ., data = OJ, kernel = "polynomial", cost = 10,
## degree = 1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 10
## degree: 1
## coef.0: 0
##
## Number of Support Vectors: 443
##
## ( 222 221 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
ypred <- predict(oj.svm3, training)
table(predict = ypred, truth = training$Purchase)## truth
## predict CH MM
## CH 446 68
## MM 56 230
train_error <- 1-(446+230)/800
train_error## [1] 0.155
# Training error is 0.155
ypred <- predict(oj.svm3, testing)
table(predict = ypred, truth = testing$Purchase)## truth
## predict CH MM
## CH 132 31
## MM 19 88
test_error <- 1-(132+88)/270
test_error## [1] 0.1851852
# Test error is 0.1851852Overall, it appears that using a polynomial kernel is the best approach. Cross validation showed that using cost = 10 and degree = 1 was the best approach for the polynomial model. This model had the overall lowest test and training error rates in comparison to linear and radial kernels.
…